@inproceedings {conf/edbtw/VakaliPD04,
	title = {An Overview of Web Data Clustering Practices},
	booktitle = {EDBT Workshops},
	series = {Lecture Notes in Computer Science},
	volume = {3268},
	year = {2004},
	pages = {597-606},
	publisher = {Springer},
	organization = {Springer},
	abstract = {<p>Clustering is a challenging topic in the area of Web data management.Various forms of clustering are required in a wide range of applications, includingfinding mirrored Web pages, detecting copyright violations, and reporting searchresults in a structured way. Clustering can either be performed once offline, (independentlyto search queries), or online (on the results of search queries). Importantefforts have focused on mining Web access logs and to cluster search engine resultson the fly. Online methods based on link structure and text have been appliedsuccessfully to finding pages on related topics. This paper presents an overview ofthe most popular methodologies and implementations in terms of clustering eitherWeb users or Web sources and presents a survey about current status and futuretrends in clustering employed over the Web.</p>
},
	keywords = {Web Data Clustering},
	isbn = {3-540-23305-9},
	author = {Athena Vakali and Pokorny, Jaroslav and Dalamagas, Theodore},
	editor = {Lindner, Wolfgang and Mesiti, Marco and T{\"u}rker, Can and Tzitzikas, Yannis and Athena Vakali}
}
@inproceedings {1809,
	title = {An Overview of Web Data Clustering Practices},
	year = {2004},
	abstract = {<p>Clustering is a challenging topic in the area of Web data management.Various forms of clustering are required in a wide range of applications, includingfinding mirrored Web pages, detecting copyright violations, and reporting searchresults in a structured way. Clustering can either be performed once offline, (independentlyto search queries), or online (on the results of search queries). Importantefforts have focused on mining Web access logs and to cluster search engine resultson the fly. Online methods based on link structure and text have been appliedsuccessfully to finding pages on related topics. This paper presents an overview ofthe most popular methodologies and implementations in terms of clustering eitherWeb users or Web sources and presents a survey about current status and futuretrends in clustering employed over the Web.</p>
}
}
@inproceedings {1842,
	title = {An Object-Based Approach for Effective XML Data Storage},
	year = {2001},
	abstract = {<p>XML data storage is a critical issue due to the socalled IObottleneck problem emerged in nowdays computer systems This paperpresents an ob jectbased XML data representation model towards eective XML data placement The proposed representation of XML docu ments is analysed in a twolevel scheme  the external level is based onthe structure of a browsing graph whereas the internal level is supportedby a treelike structure The main contribution of the paper is that it exploits the ob ject data model in order to consider XML data dependenciesaccess frequencies and constraints A simulation model has been developed in order to evaluate dierent XML data placement strategies andthe impact of the proposed representation model in the overall storageprocess XML data placement is applied on a tertiary storage subsystemby either constructive or iterative placement techniques Three popularpolicies  the Organpipethe Camel and the Simulated Annealing algorithmshave been considered and experiments have been carried out onsynthetic workloads of XML data sets The need of applying an XMLdata storage policy is apparent as indicated by the resulted improve ments in seek and service times The Simulated Annealing approach hasbeen proven to outperform the other XML data placement strategies.</p>
}
}
